from sklearn.model_selection import train_test_split
#data:需要进行分割的数据集
#random_state:设置随机种子，保证每次运行生成相同的随机数
#test_size:将数据分割成训练集的比例

import pandas as pd

data = pd.read_csv('./ratings.csv')
train_set, test_set = train_test_split(data, test_size=0.2, random_state=42)

print(train_set.head(5))
print(len(test_set))

train_set.to_csv('./train.csv', index=None, header=None)
test_set.to_csv('./test.csv', index=None, header=None)